'''
Author: 奔跑的乌龟
Date: 2020-12-05 13:25:53
LastEditTime: 2020-12-05 18:19:08
email: 435327238@qq.com
FilePath: \waibaoe:\爬虫12-4\Db.py
'''
import requests
from lxml import etree
import html
import time
import gethtml
import Db

# 解析Html并插入数据
def analysisHtml(url):

    try:
        # 解析操作
        res = gethtml.getHtml(url)
        
        selector = etree.HTML(res.text.encode('ISO-8859-1').decode('UTF-8'))

        contenta = selector.xpath('//div[@id="content"]/text()')[0]
        content = selector.xpath('//div[@id="content"]/text()')

        boot_content = '' + contenta
        for info in content:
            boot_content += str(info)
        return boot_content    
    except BaseException:
        return "1"

def produceurl():

    
    try:
        # 查询数据库已存储的数据最大booknumber值
        value = "SELECT * FROM chapter WHERE  booknumber=(SELECT MAX(booknumber) FROM chapter);"
        maxBookNumber = Db.query(value)
        maxBookNumber = maxBookNumber[0][1]
    except BaseException:
        maxBookNumber = 1
    stop = 120000
    while(True):
        url = 'http://www.aomolit.com/t/' + str(maxBookNumber)
        result = analysisHtml(url,maxBookNumber)
        maxBookNumber += 1
        if (result=="1"):
            print("该数据不存在，将跳过该数据！")
            continue
        print("成功爬取链接是："+url)
        if (maxBookNumber>=stop):
            break
